JMJPFU 28-Feb-2020 This is the notebook for exploratory data analysis for the Curn project. Lord bless this attempt of yours
assign('churnData' ,`telecom_churn_data.(1)`)
churnData[1:10,]
Let us look at the different columns and pick some of them for verification
names(churnData)
[1] "mobile_number" "circle_id" "loc_og_t2o_mou"
[4] "std_og_t2o_mou" "loc_ic_t2o_mou" "last_date_of_month_6"
[7] "last_date_of_month_7" "last_date_of_month_8" "last_date_of_month_9"
[10] "arpu_6" "arpu_7" "arpu_8"
[13] "arpu_9" "onnet_mou_6" "onnet_mou_7"
[16] "onnet_mou_8" "onnet_mou_9" "offnet_mou_6"
[19] "offnet_mou_7" "offnet_mou_8" "offnet_mou_9"
[22] "roam_ic_mou_6" "roam_ic_mou_7" "roam_ic_mou_8"
[25] "roam_ic_mou_9" "roam_og_mou_6" "roam_og_mou_7"
[28] "roam_og_mou_8" "roam_og_mou_9" "loc_og_t2t_mou_6"
[31] "loc_og_t2t_mou_7" "loc_og_t2t_mou_8" "loc_og_t2t_mou_9"
[34] "loc_og_t2m_mou_6" "loc_og_t2m_mou_7" "loc_og_t2m_mou_8"
[37] "loc_og_t2m_mou_9" "loc_og_t2f_mou_6" "loc_og_t2f_mou_7"
[40] "loc_og_t2f_mou_8" "loc_og_t2f_mou_9" "loc_og_t2c_mou_6"
[43] "loc_og_t2c_mou_7" "loc_og_t2c_mou_8" "loc_og_t2c_mou_9"
[46] "loc_og_mou_6" "loc_og_mou_7" "loc_og_mou_8"
[49] "loc_og_mou_9" "std_og_t2t_mou_6" "std_og_t2t_mou_7"
[52] "std_og_t2t_mou_8" "std_og_t2t_mou_9" "std_og_t2m_mou_6"
[55] "std_og_t2m_mou_7" "std_og_t2m_mou_8" "std_og_t2m_mou_9"
[58] "std_og_t2f_mou_6" "std_og_t2f_mou_7" "std_og_t2f_mou_8"
[61] "std_og_t2f_mou_9" "std_og_t2c_mou_6" "std_og_t2c_mou_7"
[64] "std_og_t2c_mou_8" "std_og_t2c_mou_9" "std_og_mou_6"
[67] "std_og_mou_7" "std_og_mou_8" "std_og_mou_9"
[70] "isd_og_mou_6" "isd_og_mou_7" "isd_og_mou_8"
[73] "isd_og_mou_9" "spl_og_mou_6" "spl_og_mou_7"
[76] "spl_og_mou_8" "spl_og_mou_9" "og_others_6"
[79] "og_others_7" "og_others_8" "og_others_9"
[82] "total_og_mou_6" "total_og_mou_7" "total_og_mou_8"
[85] "total_og_mou_9" "loc_ic_t2t_mou_6" "loc_ic_t2t_mou_7"
[88] "loc_ic_t2t_mou_8" "loc_ic_t2t_mou_9" "loc_ic_t2m_mou_6"
[91] "loc_ic_t2m_mou_7" "loc_ic_t2m_mou_8" "loc_ic_t2m_mou_9"
[94] "loc_ic_t2f_mou_6" "loc_ic_t2f_mou_7" "loc_ic_t2f_mou_8"
[97] "loc_ic_t2f_mou_9" "loc_ic_mou_6" "loc_ic_mou_7"
[100] "loc_ic_mou_8" "loc_ic_mou_9" "std_ic_t2t_mou_6"
[103] "std_ic_t2t_mou_7" "std_ic_t2t_mou_8" "std_ic_t2t_mou_9"
[106] "std_ic_t2m_mou_6" "std_ic_t2m_mou_7" "std_ic_t2m_mou_8"
[109] "std_ic_t2m_mou_9" "std_ic_t2f_mou_6" "std_ic_t2f_mou_7"
[112] "std_ic_t2f_mou_8" "std_ic_t2f_mou_9" "std_ic_t2o_mou_6"
[115] "std_ic_t2o_mou_7" "std_ic_t2o_mou_8" "std_ic_t2o_mou_9"
[118] "std_ic_mou_6" "std_ic_mou_7" "std_ic_mou_8"
[121] "std_ic_mou_9" "total_ic_mou_6" "total_ic_mou_7"
[124] "total_ic_mou_8" "total_ic_mou_9" "spl_ic_mou_6"
[127] "spl_ic_mou_7" "spl_ic_mou_8" "spl_ic_mou_9"
[130] "isd_ic_mou_6" "isd_ic_mou_7" "isd_ic_mou_8"
[133] "isd_ic_mou_9" "ic_others_6" "ic_others_7"
[136] "ic_others_8" "ic_others_9" "total_rech_num_6"
[139] "total_rech_num_7" "total_rech_num_8" "total_rech_num_9"
[142] "total_rech_amt_6" "total_rech_amt_7" "total_rech_amt_8"
[145] "total_rech_amt_9" "max_rech_amt_6" "max_rech_amt_7"
[148] "max_rech_amt_8" "max_rech_amt_9" "date_of_last_rech_6"
[151] "date_of_last_rech_7" "date_of_last_rech_8" "date_of_last_rech_9"
[154] "last_day_rch_amt_6" "last_day_rch_amt_7" "last_day_rch_amt_8"
[157] "last_day_rch_amt_9" "date_of_last_rech_data_6" "date_of_last_rech_data_7"
[160] "date_of_last_rech_data_8" "date_of_last_rech_data_9" "total_rech_data_6"
[163] "total_rech_data_7" "total_rech_data_8" "total_rech_data_9"
[166] "max_rech_data_6" "max_rech_data_7" "max_rech_data_8"
[169] "max_rech_data_9" "count_rech_2g_6" "count_rech_2g_7"
[172] "count_rech_2g_8" "count_rech_2g_9" "count_rech_3g_6"
[175] "count_rech_3g_7" "count_rech_3g_8" "count_rech_3g_9"
[178] "av_rech_amt_data_6" "av_rech_amt_data_7" "av_rech_amt_data_8"
[181] "av_rech_amt_data_9" "vol_2g_mb_6" "vol_2g_mb_7"
[184] "vol_2g_mb_8" "vol_2g_mb_9" "vol_3g_mb_6"
[187] "vol_3g_mb_7" "vol_3g_mb_8" "vol_3g_mb_9"
[190] "arpu_3g_6" "arpu_3g_7" "arpu_3g_8"
[193] "arpu_3g_9" "arpu_2g_6" "arpu_2g_7"
[196] "arpu_2g_8" "arpu_2g_9" "night_pck_user_6"
[199] "night_pck_user_7" "night_pck_user_8" "night_pck_user_9"
[202] "monthly_2g_6" "monthly_2g_7" "monthly_2g_8"
[205] "monthly_2g_9" "sachet_2g_6" "sachet_2g_7"
[208] "sachet_2g_8" "sachet_2g_9" "monthly_3g_6"
[211] "monthly_3g_7" "monthly_3g_8" "monthly_3g_9"
[214] "sachet_3g_6" "sachet_3g_7" "sachet_3g_8"
[217] "sachet_3g_9" "fb_user_6" "fb_user_7"
[220] "fb_user_8" "fb_user_9" "aon"
[223] "aug_vbc_3g" "jul_vbc_3g" "jun_vbc_3g"
[226] "sep_vbc_3g"
summary(churnData)
mobile_number circle_id loc_og_t2o_mou std_og_t2o_mou loc_ic_t2o_mou last_date_of_month_6
Min. :7.000e+09 Min. :109 Min. :0 Min. :0 Min. :0 6/30/2014:99999
1st Qu.:7.001e+09 1st Qu.:109 1st Qu.:0 1st Qu.:0 1st Qu.:0
Median :7.001e+09 Median :109 Median :0 Median :0 Median :0
Mean :7.001e+09 Mean :109 Mean :0 Mean :0 Mean :0
last_date_of_month_7 last_date_of_month_8 last_date_of_month_9 arpu_6 arpu_7
: 601 : 1100 : 1659 Min. :-2258.71 Min. :-2014.05
7/31/2014:99398 8/31/2014:98899 9/30/2014:98340 1st Qu.: 93.41 1st Qu.: 86.98
Median : 197.70 Median : 191.64
Mean : 282.99 Mean : 278.54
arpu_8 arpu_9 onnet_mou_6 onnet_mou_7 onnet_mou_8
Min. : -945.81 Min. :-1899.51 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 84.13 1st Qu.: 62.69 1st Qu.: 7.38 1st Qu.: 6.66 1st Qu.: 6.46
Median : 192.08 Median : 176.85 Median : 34.31 Median : 32.33 Median : 32.36
Mean : 279.15 Mean : 261.65 Mean : 132.40 Mean : 133.67 Mean : 133.02
onnet_mou_9 offnet_mou_6 offnet_mou_7 offnet_mou_8 offnet_mou_9
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 5.33 1st Qu.: 34.73 1st Qu.: 32.19 1st Qu.: 31.63 1st Qu.: 27.13
Median : 29.84 Median : 96.31 Median : 91.73 Median : 92.14 Median : 87.29
Mean : 130.30 Mean : 197.94 Mean : 197.04 Mean : 196.57 Mean : 190.34
roam_ic_mou_6 roam_ic_mou_7 roam_ic_mou_8 roam_ic_mou_9 roam_og_mou_6
Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.00
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.00
Median : 0.00 Median : 0.00 Median : 0.000 Median : 0.000 Median : 0.00
Mean : 9.95 Mean : 7.15 Mean : 7.293 Mean : 6.344 Mean : 13.91
roam_og_mou_7 roam_og_mou_8 roam_og_mou_9 loc_og_t2t_mou_6 loc_og_t2t_mou_7
Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.00
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 1.66 1st Qu.: 1.63
Median : 0.000 Median : 0.000 Median : 0.000 Median : 11.91 Median : 11.61
Mean : 9.819 Mean : 9.972 Mean : 8.556 Mean : 47.10 Mean : 46.47
loc_og_t2t_mou_8 loc_og_t2t_mou_9 loc_og_t2m_mou_6 loc_og_t2m_mou_7 loc_og_t2m_mou_8
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 1.60 1st Qu.: 1.36 1st Qu.: 9.88 1st Qu.: 10.03 1st Qu.: 9.81
Median : 11.73 Median : 11.26 Median : 41.03 Median : 40.43 Median : 40.36
Mean : 45.89 Mean : 44.58 Mean : 93.34 Mean : 91.40 Mean : 91.75
loc_og_t2m_mou_9 loc_og_t2f_mou_6 loc_og_t2f_mou_7 loc_og_t2f_mou_8 loc_og_t2f_mou_9
Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.: 8.81 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
Median : 39.12 Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000
Mean : 90.46 Mean : 3.751 Mean : 3.793 Mean : 3.678 Mean : 3.655
loc_og_t2c_mou_6 loc_og_t2c_mou_7 loc_og_t2c_mou_8 loc_og_t2c_mou_9 loc_og_mou_6
Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.00
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 17.11
Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000 Median : 65.11
Mean : 1.123 Mean : 1.369 Mean : 1.434 Mean : 1.233 Mean : 144.20
loc_og_mou_7 loc_og_mou_8 loc_og_mou_9 std_og_t2t_mou_6 std_og_t2t_mou_7
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 17.48 1st Qu.: 17.11 1st Qu.: 15.56 1st Qu.: 0.00 1st Qu.: 0.00
Median : 63.69 Median : 63.73 Median : 61.84 Median : 0.00 Median : 0.00
Mean : 141.67 Mean : 141.33 Mean : 138.71 Mean : 79.83 Mean : 83.30
std_og_t2t_mou_8 std_og_t2t_mou_9 std_og_t2m_mou_6 std_og_t2m_mou_7 std_og_t2m_mou_8
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.00
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.00
Median : 0.00 Median : 0.00 Median : 3.95 Median : 3.635 Median : 3.31
Mean : 83.28 Mean : 82.34 Mean : 87.30 Mean : 90.804 Mean : 89.84
std_og_t2m_mou_9 std_og_t2f_mou_6 std_og_t2f_mou_7 std_og_t2f_mou_8 std_og_t2f_mou_9
Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
Median : 2.50 Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000
Mean : 86.28 Mean : 1.129 Mean : 1.115 Mean : 1.068 Mean : 1.042
std_og_t2c_mou_6 std_og_t2c_mou_7 std_og_t2c_mou_8 std_og_t2c_mou_9 std_og_mou_6 std_og_mou_7
Min. :0 Min. :0 Min. :0 Min. :0 Min. : 0.00 Min. : 0.00
1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.: 0.00 1st Qu.: 0.00
Median :0 Median :0 Median :0 Median :0 Median : 11.64 Median : 11.09
Mean :0 Mean :0 Mean :0 Mean :0 Mean : 168.26 Mean : 175.22
std_og_mou_8 std_og_mou_9 isd_og_mou_6 isd_og_mou_7 isd_og_mou_8
Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
Median : 10.41 Median : 8.41 Median : 0.000 Median : 0.000 Median : 0.000
Mean : 174.19 Mean : 169.66 Mean : 0.798 Mean : 0.777 Mean : 0.791
isd_og_mou_9 spl_og_mou_6 spl_og_mou_7 spl_og_mou_8 spl_og_mou_9
Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000
Mean : 0.724 Mean : 3.917 Mean : 4.978 Mean : 5.054 Mean : 4.413
og_others_6 og_others_7 og_others_8 og_others_9 total_og_mou_6
Min. : 0.000 Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.00
1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 44.74
Median : 0.000 Median : 0.00 Median : 0.000 Median : 0.000 Median : 145.14
Mean : 0.454 Mean : 0.03 Mean : 0.033 Mean : 0.047 Mean : 305.13
total_og_mou_7 total_og_mou_8 total_og_mou_9 loc_ic_t2t_mou_6 loc_ic_t2t_mou_7
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 43.01 1st Qu.: 38.58 1st Qu.: 25.51 1st Qu.: 2.99 1st Qu.: 3.23
Median : 141.53 Median : 138.61 Median : 125.46 Median : 15.69 Median : 15.74
Mean : 310.23 Mean : 304.12 Mean : 289.28 Mean : 47.92 Mean : 47.99
loc_ic_t2t_mou_8 loc_ic_t2t_mou_9 loc_ic_t2m_mou_6 loc_ic_t2m_mou_7 loc_ic_t2m_mou_8
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 3.28 1st Qu.: 3.29 1st Qu.: 17.29 1st Qu.: 18.59 1st Qu.: 18.93
Median : 16.03 Median : 15.66 Median : 56.49 Median : 57.08 Median : 58.24
Mean : 47.21 Mean : 46.28 Mean : 107.48 Mean : 107.12 Mean : 108.46
loc_ic_t2m_mou_9 loc_ic_t2f_mou_6 loc_ic_t2f_mou_7 loc_ic_t2f_mou_8 loc_ic_t2f_mou_9
Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.00 Min. : 0.00
1st Qu.: 18.56 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.00
Median : 56.61 Median : 0.88 Median : 0.930 Median : 0.93 Median : 0.96
Mean : 106.16 Mean : 12.08 Mean : 12.600 Mean : 11.75 Mean : 12.17
loc_ic_mou_6 loc_ic_mou_7 loc_ic_mou_8 loc_ic_mou_9 std_ic_t2t_mou_6
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.000
1st Qu.: 30.39 1st Qu.: 32.46 1st Qu.: 32.74 1st Qu.: 32.29 1st Qu.: 0.000
Median : 92.16 Median : 92.55 Median : 93.83 Median : 91.64 Median : 0.000
Mean : 167.49 Mean : 167.72 Mean : 167.43 Mean : 164.62 Mean : 9.576
std_ic_t2t_mou_7 std_ic_t2t_mou_8 std_ic_t2t_mou_9 std_ic_t2m_mou_6 std_ic_t2m_mou_7
Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.00
1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.00
Median : 0.00 Median : 0.000 Median : 0.000 Median : 2.03 Median : 2.04
Mean : 10.01 Mean : 9.884 Mean : 9.432 Mean : 20.72 Mean : 21.66
std_ic_t2m_mou_8 std_ic_t2m_mou_9 std_ic_t2f_mou_6 std_ic_t2f_mou_7 std_ic_t2f_mou_8
Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
Median : 2.03 Median : 1.74 Median : 0.000 Median : 0.000 Median : 0.000
Mean : 21.18 Mean : 19.62 Mean : 2.156 Mean : 2.217 Mean : 2.085
std_ic_t2f_mou_9 std_ic_t2o_mou_6 std_ic_t2o_mou_7 std_ic_t2o_mou_8 std_ic_t2o_mou_9 std_ic_mou_6
Min. : 0.000 Min. :0 Min. :0 Min. :0 Min. :0 Min. : 0.00
1st Qu.: 0.000 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.: 0.00
Median : 0.000 Median :0 Median :0 Median :0 Median :0 Median : 5.89
Mean : 2.173 Mean :0 Mean :0 Mean :0 Mean :0 Mean : 32.46
std_ic_mou_7 std_ic_mou_8 std_ic_mou_9 total_ic_mou_6 total_ic_mou_7
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 0.00 1st Qu.: 0.01 1st Qu.: 0.00 1st Qu.: 38.53 1st Qu.: 41.19
Median : 5.96 Median : 5.88 Median : 5.38 Median : 114.74 Median : 116.34
Mean : 33.89 Mean : 33.16 Mean : 31.23 Mean : 200.13 Mean : 202.85
total_ic_mou_8 total_ic_mou_9 spl_ic_mou_6 spl_ic_mou_7 spl_ic_mou_8 spl_ic_mou_9
Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.000
1st Qu.: 38.29 1st Qu.: 32.37 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.000
Median : 114.66 Median : 105.89 Median : 0.000 Median : 0.000 Median : 0.00 Median : 0.000
Mean : 198.75 Mean : 189.21 Mean : 0.062 Mean : 0.034 Mean : 0.04 Mean : 0.163
isd_ic_mou_6 isd_ic_mou_7 isd_ic_mou_8 isd_ic_mou_9 ic_others_6
Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000
Mean : 7.461 Mean : 8.335 Mean : 8.442 Mean : 8.063 Mean : 0.855
ic_others_7 ic_others_8 ic_others_9 total_rech_num_6 total_rech_num_7
Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.0
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 3.000 1st Qu.: 3.0
Median : 0.000 Median : 0.000 Median : 0.000 Median : 6.000 Median : 6.0
Mean : 1.013 Mean : 0.971 Mean : 1.017 Mean : 7.559 Mean : 7.7
total_rech_num_8 total_rech_num_9 total_rech_amt_6 total_rech_amt_7 total_rech_amt_8
Min. : 0.000 Min. : 0.000 Min. : 0.0 Min. : 0 Min. : 0.0
1st Qu.: 3.000 1st Qu.: 3.000 1st Qu.: 109.0 1st Qu.: 100 1st Qu.: 90.0
Median : 5.000 Median : 5.000 Median : 230.0 Median : 220 Median : 225.0
Mean : 7.213 Mean : 6.893 Mean : 327.5 Mean : 323 Mean : 324.2
total_rech_amt_9 max_rech_amt_6 max_rech_amt_7 max_rech_amt_8 max_rech_amt_9 date_of_last_rech_6
Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0.0 6/30/2014:16960
1st Qu.: 52.0 1st Qu.: 30.0 1st Qu.: 30.0 1st Qu.: 30.0 1st Qu.: 28.0 6/29/2014:12918
Median : 200.0 Median : 110.0 Median : 110.0 Median : 98.0 Median : 61.0 6/27/2014:11169
Mean : 303.4 Mean : 104.6 Mean : 104.8 Mean : 107.7 Mean : 101.9 6/28/2014: 9491
date_of_last_rech_7 date_of_last_rech_8 date_of_last_rech_9 last_day_rch_amt_6 last_day_rch_amt_7
7/31/2014:17288 8/31/2014:14706 9/29/2014:22623 Min. : 0.00 Min. : 0.00
7/30/2014:13863 8/30/2014:11707 9/30/2014:21713 1st Qu.: 0.00 1st Qu.: 0.00
7/25/2014: 9401 8/29/2014:10057 9/28/2014:10601 Median : 30.00 Median : 30.00
7/29/2014: 9052 8/28/2014: 9816 9/27/2014: 6473 Mean : 63.16 Mean : 59.39
last_day_rch_amt_8 last_day_rch_amt_9 date_of_last_rech_data_6 date_of_last_rech_data_7
Min. : 0.00 Min. : 0.0 :74846 :74428
1st Qu.: 0.00 1st Qu.: 0.0 6/30/2014: 1888 7/31/2014: 1813
Median : 30.00 Median : 0.0 6/29/2014: 1651 7/29/2014: 1719
Mean : 62.64 Mean : 43.9 6/28/2014: 1643 7/30/2014: 1665
date_of_last_rech_data_8 date_of_last_rech_data_9 total_rech_data_6 total_rech_data_7 total_rech_data_8
:73660 :74077 Min. : 1.00 Min. : 1.00 Min. : 1.00
8/31/2014: 1998 9/29/2014: 2329 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00
8/30/2014: 1867 9/27/2014: 1810 Median : 1.00 Median : 1.00 Median : 1.00
8/29/2014: 1605 9/30/2014: 1425 Mean : 2.46 Mean : 2.67 Mean : 2.65
total_rech_data_9 max_rech_data_6 max_rech_data_7 max_rech_data_8 max_rech_data_9 count_rech_2g_6
Min. : 1.00 Min. : 1.0 Min. : 1.0 Min. : 1.0 Min. : 1.0 Min. : 0.00
1st Qu.: 1.00 1st Qu.: 25.0 1st Qu.: 25.0 1st Qu.: 25.0 1st Qu.: 25.0 1st Qu.: 1.00
Median : 2.00 Median : 145.0 Median : 145.0 Median : 145.0 Median : 145.0 Median : 1.00
Mean : 2.44 Mean : 126.4 Mean : 126.7 Mean : 125.7 Mean : 124.9 Mean : 1.86
count_rech_2g_7 count_rech_2g_8 count_rech_2g_9 count_rech_3g_6 count_rech_3g_7 count_rech_3g_8
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.00
1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.: 0.00
Median : 1.00 Median : 1.00 Median : 1.00 Median : 0.0 Median : 0.00 Median : 0.00
Mean : 2.04 Mean : 2.02 Mean : 1.78 Mean : 0.6 Mean : 0.62 Mean : 0.64
count_rech_3g_9 av_rech_amt_data_6 av_rech_amt_data_7 av_rech_amt_data_8 av_rech_amt_data_9
Min. : 0.00 Min. : 1.0 Min. : 0.5 Min. : 0.5 Min. : 1.0
1st Qu.: 0.00 1st Qu.: 82.0 1st Qu.: 92.0 1st Qu.: 87.0 1st Qu.: 69.0
Median : 0.00 Median : 154.0 Median : 154.0 Median : 154.0 Median : 164.0
Mean : 0.66 Mean : 192.6 Mean : 201.0 Mean : 197.5 Mean : 192.7
vol_2g_mb_6 vol_2g_mb_7 vol_2g_mb_8 vol_2g_mb_9 vol_3g_mb_6
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.0
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.0
Median : 0.00 Median : 0.00 Median : 0.00 Median : 0.00 Median : 0.0
Mean : 51.91 Mean : 51.23 Mean : 50.17 Mean : 44.72 Mean : 121.4
vol_3g_mb_7 vol_3g_mb_8 vol_3g_mb_9 arpu_3g_6 arpu_3g_7 arpu_3g_8
Min. : 0 Min. : 0.0 Min. : 0.0 Min. : -30.82 Min. : -26.04 Min. : -24.49
1st Qu.: 0 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
Median : 0 Median : 0.0 Median : 0.0 Median : 0.48 Median : 0.42 Median : 0.88
Mean : 129 Mean : 135.4 Mean : 136.1 Mean : 89.56 Mean : 89.38 Mean : 91.17
arpu_3g_9 arpu_2g_6 arpu_2g_7 arpu_2g_8 arpu_2g_9
Min. : -71.09 Min. : -35.83 Min. : -15.48 Min. : -55.83 Min. : -45.74
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
Median : 2.60 Median : 10.83 Median : 8.81 Median : 9.27 Median : 14.80
Mean : 100.26 Mean : 86.40 Mean : 85.91 Mean : 86.60 Mean : 93.71
night_pck_user_6 night_pck_user_7 night_pck_user_8 night_pck_user_9 monthly_2g_6 monthly_2g_7
Min. :0.00 Min. :0.00 Min. :0.00 Min. :0.00 Min. :0.00000 Min. :0.00000
1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.00000 1st Qu.:0.00000
Median :0.00 Median :0.00 Median :0.00 Median :0.00 Median :0.00000 Median :0.00000
Mean :0.03 Mean :0.02 Mean :0.02 Mean :0.02 Mean :0.07964 Mean :0.08322
monthly_2g_8 monthly_2g_9 sachet_2g_6 sachet_2g_7 sachet_2g_8 sachet_2g_9
Min. :0.000 Min. :0.00000 Min. : 0.0000 Min. : 0.0000 Min. : 0.0000 Min. : 0.0000
1st Qu.:0.000 1st Qu.:0.00000 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.0000
Median :0.000 Median :0.00000 Median : 0.0000 Median : 0.0000 Median : 0.0000 Median : 0.0000
Mean :0.081 Mean :0.06878 Mean : 0.3894 Mean : 0.4396 Mean : 0.4501 Mean : 0.3931
monthly_3g_6 monthly_3g_7 monthly_3g_8 monthly_3g_9 sachet_3g_6
Min. : 0.00000 Min. : 0.00000 Min. : 0.00000 Min. : 0.00000 Min. : 0.00000
1st Qu.: 0.00000 1st Qu.: 0.00000 1st Qu.: 0.00000 1st Qu.: 0.00000 1st Qu.: 0.00000
Median : 0.00000 Median : 0.00000 Median : 0.00000 Median : 0.00000 Median : 0.00000
Mean : 0.07592 Mean : 0.07858 Mean : 0.08294 Mean : 0.08634 Mean : 0.07478
sachet_3g_7 sachet_3g_8 sachet_3g_9 fb_user_6 fb_user_7 fb_user_8
Min. : 0.0000 Min. : 0.0000 Min. : 0.00000 Min. :0.00 Min. :0.00 Min. :0.00
1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.00000 1st Qu.:1.00 1st Qu.:1.00 1st Qu.:1.00
Median : 0.0000 Median : 0.0000 Median : 0.00000 Median :1.00 Median :1.00 Median :1.00
Mean : 0.0804 Mean : 0.0845 Mean : 0.08458 Mean :0.91 Mean :0.91 Mean :0.89
fb_user_9 aon aug_vbc_3g jul_vbc_3g jun_vbc_3g sep_vbc_3g
Min. :0.00 Min. : 180 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.000
1st Qu.:1.00 1st Qu.: 467 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000
Median :1.00 Median : 863 Median : 0.00 Median : 0.00 Median : 0.00 Median : 0.000
Mean :0.86 Mean :1220 Mean : 68.17 Mean : 66.84 Mean : 60.02 Mean : 3.299
[ reached getOption("max.print") -- omitted 3 rows ]
Let us find some columns where the column sums are zero. These columns have to be eliminated. To do that let us find those numeric columns
library(dplyr)
numCols = churnData %>% select_if(is.numeric) %>% colnames()
numChurn = churnData[,numCols]
head(numChurn)
Now let us find those columns whose sums equal 0 and then find those column names
zeroCols
[1] "loc_og_t2o_mou" "std_og_t2o_mou" "loc_ic_t2o_mou" "std_og_t2c_mou_6" "std_og_t2c_mou_7"
[6] "std_og_t2c_mou_8" "std_og_t2c_mou_9" "std_ic_t2o_mou_6" "std_ic_t2o_mou_7" "std_ic_t2o_mou_8"
[11] "std_ic_t2o_mou_9"
Lets eliminate these columns from the data set
dim(numChurn)
[1] 99999 214
numChurn <- numChurn %>% select(-c(zeroCols))
dim(numChurn)
[1] 99999 203
head(numChurn)
summary(numChurn)
mobile_number circle_id arpu_6 arpu_7 arpu_8
Min. :7.000e+09 Min. :109 Min. :-2258.71 Min. :-2014.05 Min. : -945.81
1st Qu.:7.001e+09 1st Qu.:109 1st Qu.: 93.41 1st Qu.: 86.98 1st Qu.: 84.13
Median :7.001e+09 Median :109 Median : 197.70 Median : 191.64 Median : 192.08
Mean :7.001e+09 Mean :109 Mean : 282.99 Mean : 278.54 Mean : 279.15
arpu_9 onnet_mou_6 onnet_mou_7 onnet_mou_8 onnet_mou_9
Min. :-1899.51 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 62.69 1st Qu.: 7.38 1st Qu.: 6.66 1st Qu.: 6.46 1st Qu.: 5.33
Median : 176.85 Median : 34.31 Median : 32.33 Median : 32.36 Median : 29.84
Mean : 261.65 Mean : 132.40 Mean : 133.67 Mean : 133.02 Mean : 130.30
offnet_mou_6 offnet_mou_7 offnet_mou_8 offnet_mou_9 roam_ic_mou_6
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 34.73 1st Qu.: 32.19 1st Qu.: 31.63 1st Qu.: 27.13 1st Qu.: 0.00
Median : 96.31 Median : 91.73 Median : 92.14 Median : 87.29 Median : 0.00
Mean : 197.94 Mean : 197.04 Mean : 196.57 Mean : 190.34 Mean : 9.95
roam_ic_mou_7 roam_ic_mou_8 roam_ic_mou_9 roam_og_mou_6 roam_og_mou_7
Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.000
1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.000
Median : 0.00 Median : 0.000 Median : 0.000 Median : 0.00 Median : 0.000
Mean : 7.15 Mean : 7.293 Mean : 6.344 Mean : 13.91 Mean : 9.819
roam_og_mou_8 roam_og_mou_9 loc_og_t2t_mou_6 loc_og_t2t_mou_7 loc_og_t2t_mou_8
Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 1.66 1st Qu.: 1.63 1st Qu.: 1.60
Median : 0.000 Median : 0.000 Median : 11.91 Median : 11.61 Median : 11.73
Mean : 9.972 Mean : 8.556 Mean : 47.10 Mean : 46.47 Mean : 45.89
loc_og_t2t_mou_9 loc_og_t2m_mou_6 loc_og_t2m_mou_7 loc_og_t2m_mou_8 loc_og_t2m_mou_9
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 1.36 1st Qu.: 9.88 1st Qu.: 10.03 1st Qu.: 9.81 1st Qu.: 8.81
Median : 11.26 Median : 41.03 Median : 40.43 Median : 40.36 Median : 39.12
Mean : 44.58 Mean : 93.34 Mean : 91.40 Mean : 91.75 Mean : 90.46
loc_og_t2f_mou_6 loc_og_t2f_mou_7 loc_og_t2f_mou_8 loc_og_t2f_mou_9 loc_og_t2c_mou_6
Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000
Mean : 3.751 Mean : 3.793 Mean : 3.678 Mean : 3.655 Mean : 1.123
loc_og_t2c_mou_7 loc_og_t2c_mou_8 loc_og_t2c_mou_9 loc_og_mou_6 loc_og_mou_7
Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.00
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 17.11 1st Qu.: 17.48
Median : 0.000 Median : 0.000 Median : 0.000 Median : 65.11 Median : 63.69
Mean : 1.369 Mean : 1.434 Mean : 1.233 Mean : 144.20 Mean : 141.67
loc_og_mou_8 loc_og_mou_9 std_og_t2t_mou_6 std_og_t2t_mou_7 std_og_t2t_mou_8
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 17.11 1st Qu.: 15.56 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
Median : 63.73 Median : 61.84 Median : 0.00 Median : 0.00 Median : 0.00
Mean : 141.33 Mean : 138.71 Mean : 79.83 Mean : 83.30 Mean : 83.28
std_og_t2t_mou_9 std_og_t2m_mou_6 std_og_t2m_mou_7 std_og_t2m_mou_8 std_og_t2m_mou_9
Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.00 Min. : 0.00
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.00
Median : 0.00 Median : 3.95 Median : 3.635 Median : 3.31 Median : 2.50
Mean : 82.34 Mean : 87.30 Mean : 90.804 Mean : 89.84 Mean : 86.28
std_og_t2f_mou_6 std_og_t2f_mou_7 std_og_t2f_mou_8 std_og_t2f_mou_9 std_og_mou_6
Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.00
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.00
Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000 Median : 11.64
Mean : 1.129 Mean : 1.115 Mean : 1.068 Mean : 1.042 Mean : 168.26
std_og_mou_7 std_og_mou_8 std_og_mou_9 isd_og_mou_6 isd_og_mou_7
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.000
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000
Median : 11.09 Median : 10.41 Median : 8.41 Median : 0.000 Median : 0.000
Mean : 175.22 Mean : 174.19 Mean : 169.66 Mean : 0.798 Mean : 0.777
isd_og_mou_8 isd_og_mou_9 spl_og_mou_6 spl_og_mou_7 spl_og_mou_8
Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000
Mean : 0.791 Mean : 0.724 Mean : 3.917 Mean : 4.978 Mean : 5.054
spl_og_mou_9 og_others_6 og_others_7 og_others_8 og_others_9
Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.000 Min. : 0.000
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000
Median : 0.000 Median : 0.000 Median : 0.00 Median : 0.000 Median : 0.000
Mean : 4.413 Mean : 0.454 Mean : 0.03 Mean : 0.033 Mean : 0.047
total_og_mou_6 total_og_mou_7 total_og_mou_8 total_og_mou_9 loc_ic_t2t_mou_6
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 44.74 1st Qu.: 43.01 1st Qu.: 38.58 1st Qu.: 25.51 1st Qu.: 2.99
Median : 145.14 Median : 141.53 Median : 138.61 Median : 125.46 Median : 15.69
Mean : 305.13 Mean : 310.23 Mean : 304.12 Mean : 289.28 Mean : 47.92
loc_ic_t2t_mou_7 loc_ic_t2t_mou_8 loc_ic_t2t_mou_9 loc_ic_t2m_mou_6 loc_ic_t2m_mou_7
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 3.23 1st Qu.: 3.28 1st Qu.: 3.29 1st Qu.: 17.29 1st Qu.: 18.59
Median : 15.74 Median : 16.03 Median : 15.66 Median : 56.49 Median : 57.08
Mean : 47.99 Mean : 47.21 Mean : 46.28 Mean : 107.48 Mean : 107.12
loc_ic_t2m_mou_8 loc_ic_t2m_mou_9 loc_ic_t2f_mou_6 loc_ic_t2f_mou_7 loc_ic_t2f_mou_8
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.00
1st Qu.: 18.93 1st Qu.: 18.56 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.00
Median : 58.24 Median : 56.61 Median : 0.88 Median : 0.930 Median : 0.93
Mean : 108.46 Mean : 106.16 Mean : 12.08 Mean : 12.600 Mean : 11.75
loc_ic_t2f_mou_9 loc_ic_mou_6 loc_ic_mou_7 loc_ic_mou_8 loc_ic_mou_9
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 0.00 1st Qu.: 30.39 1st Qu.: 32.46 1st Qu.: 32.74 1st Qu.: 32.29
Median : 0.96 Median : 92.16 Median : 92.55 Median : 93.83 Median : 91.64
Mean : 12.17 Mean : 167.49 Mean : 167.72 Mean : 167.43 Mean : 164.62
std_ic_t2t_mou_6 std_ic_t2t_mou_7 std_ic_t2t_mou_8 std_ic_t2t_mou_9 std_ic_t2m_mou_6
Min. : 0.000 Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.00
1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.00
Median : 0.000 Median : 0.00 Median : 0.000 Median : 0.000 Median : 2.03
Mean : 9.576 Mean : 10.01 Mean : 9.884 Mean : 9.432 Mean : 20.72
std_ic_t2m_mou_7 std_ic_t2m_mou_8 std_ic_t2m_mou_9 std_ic_t2f_mou_6 std_ic_t2f_mou_7
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.000
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000
Median : 2.04 Median : 2.03 Median : 1.74 Median : 0.000 Median : 0.000
Mean : 21.66 Mean : 21.18 Mean : 19.62 Mean : 2.156 Mean : 2.217
std_ic_t2f_mou_8 std_ic_t2f_mou_9 std_ic_mou_6 std_ic_mou_7 std_ic_mou_8
Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.01
Median : 0.000 Median : 0.000 Median : 5.89 Median : 5.96 Median : 5.88
Mean : 2.085 Mean : 2.173 Mean : 32.46 Mean : 33.89 Mean : 33.16
std_ic_mou_9 total_ic_mou_6 total_ic_mou_7 total_ic_mou_8 total_ic_mou_9
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 0.00 1st Qu.: 38.53 1st Qu.: 41.19 1st Qu.: 38.29 1st Qu.: 32.37
Median : 5.38 Median : 114.74 Median : 116.34 Median : 114.66 Median : 105.89
Mean : 31.23 Mean : 200.13 Mean : 202.85 Mean : 198.75 Mean : 189.21
spl_ic_mou_6 spl_ic_mou_7 spl_ic_mou_8 spl_ic_mou_9 isd_ic_mou_6 isd_ic_mou_7
Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
Median : 0.000 Median : 0.000 Median : 0.00 Median : 0.000 Median : 0.000 Median : 0.000
Mean : 0.062 Mean : 0.034 Mean : 0.04 Mean : 0.163 Mean : 7.461 Mean : 8.335
isd_ic_mou_8 isd_ic_mou_9 ic_others_6 ic_others_7 ic_others_8
Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000
Mean : 8.442 Mean : 8.063 Mean : 0.855 Mean : 1.013 Mean : 0.971
ic_others_9 total_rech_num_6 total_rech_num_7 total_rech_num_8 total_rech_num_9
Min. : 0.000 Min. : 0.000 Min. : 0.0 Min. : 0.000 Min. : 0.000
1st Qu.: 0.000 1st Qu.: 3.000 1st Qu.: 3.0 1st Qu.: 3.000 1st Qu.: 3.000
Median : 0.000 Median : 6.000 Median : 6.0 Median : 5.000 Median : 5.000
Mean : 1.017 Mean : 7.559 Mean : 7.7 Mean : 7.213 Mean : 6.893
total_rech_amt_6 total_rech_amt_7 total_rech_amt_8 total_rech_amt_9 max_rech_amt_6 max_rech_amt_7
Min. : 0.0 Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0.0
1st Qu.: 109.0 1st Qu.: 100 1st Qu.: 90.0 1st Qu.: 52.0 1st Qu.: 30.0 1st Qu.: 30.0
Median : 230.0 Median : 220 Median : 225.0 Median : 200.0 Median : 110.0 Median : 110.0
Mean : 327.5 Mean : 323 Mean : 324.2 Mean : 303.4 Mean : 104.6 Mean : 104.8
max_rech_amt_8 max_rech_amt_9 last_day_rch_amt_6 last_day_rch_amt_7 last_day_rch_amt_8
Min. : 0.0 Min. : 0.0 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 30.0 1st Qu.: 28.0 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
Median : 98.0 Median : 61.0 Median : 30.00 Median : 30.00 Median : 30.00
Mean : 107.7 Mean : 101.9 Mean : 63.16 Mean : 59.39 Mean : 62.64
last_day_rch_amt_9 total_rech_data_6 total_rech_data_7 total_rech_data_8 total_rech_data_9
Min. : 0.0 Min. : 1.00 Min. : 1.00 Min. : 1.00 Min. : 1.00
1st Qu.: 0.0 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00
Median : 0.0 Median : 1.00 Median : 1.00 Median : 1.00 Median : 2.00
Mean : 43.9 Mean : 2.46 Mean : 2.67 Mean : 2.65 Mean : 2.44
max_rech_data_6 max_rech_data_7 max_rech_data_8 max_rech_data_9 count_rech_2g_6 count_rech_2g_7
Min. : 1.0 Min. : 1.0 Min. : 1.0 Min. : 1.0 Min. : 0.00 Min. : 0.00
1st Qu.: 25.0 1st Qu.: 25.0 1st Qu.: 25.0 1st Qu.: 25.0 1st Qu.: 1.00 1st Qu.: 1.00
Median : 145.0 Median : 145.0 Median : 145.0 Median : 145.0 Median : 1.00 Median : 1.00
Mean : 126.4 Mean : 126.7 Mean : 125.7 Mean : 124.9 Mean : 1.86 Mean : 2.04
count_rech_2g_8 count_rech_2g_9 count_rech_3g_6 count_rech_3g_7 count_rech_3g_8 count_rech_3g_9
Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
Median : 1.00 Median : 1.00 Median : 0.0 Median : 0.00 Median : 0.00 Median : 0.00
Mean : 2.02 Mean : 1.78 Mean : 0.6 Mean : 0.62 Mean : 0.64 Mean : 0.66
av_rech_amt_data_6 av_rech_amt_data_7 av_rech_amt_data_8 av_rech_amt_data_9 vol_2g_mb_6
Min. : 1.0 Min. : 0.5 Min. : 0.5 Min. : 1.0 Min. : 0.00
1st Qu.: 82.0 1st Qu.: 92.0 1st Qu.: 87.0 1st Qu.: 69.0 1st Qu.: 0.00
Median : 154.0 Median : 154.0 Median : 154.0 Median : 164.0 Median : 0.00
Mean : 192.6 Mean : 201.0 Mean : 197.5 Mean : 192.7 Mean : 51.91
vol_2g_mb_7 vol_2g_mb_8 vol_2g_mb_9 vol_3g_mb_6 vol_3g_mb_7
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.0 1st Qu.: 0
Median : 0.00 Median : 0.00 Median : 0.00 Median : 0.0 Median : 0
Mean : 51.23 Mean : 50.17 Mean : 44.72 Mean : 121.4 Mean : 129
vol_3g_mb_8 vol_3g_mb_9 arpu_3g_6 arpu_3g_7 arpu_3g_8
Min. : 0.0 Min. : 0.0 Min. : -30.82 Min. : -26.04 Min. : -24.49
1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
Median : 0.0 Median : 0.0 Median : 0.48 Median : 0.42 Median : 0.88
Mean : 135.4 Mean : 136.1 Mean : 89.56 Mean : 89.38 Mean : 91.17
arpu_3g_9 arpu_2g_6 arpu_2g_7 arpu_2g_8 arpu_2g_9
Min. : -71.09 Min. : -35.83 Min. : -15.48 Min. : -55.83 Min. : -45.74
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
Median : 2.60 Median : 10.83 Median : 8.81 Median : 9.27 Median : 14.80
Mean : 100.26 Mean : 86.40 Mean : 85.91 Mean : 86.60 Mean : 93.71
night_pck_user_6 night_pck_user_7 night_pck_user_8 night_pck_user_9 monthly_2g_6 monthly_2g_7
Min. :0.00 Min. :0.00 Min. :0.00 Min. :0.00 Min. :0.00000 Min. :0.00000
1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.00000 1st Qu.:0.00000
Median :0.00 Median :0.00 Median :0.00 Median :0.00 Median :0.00000 Median :0.00000
Mean :0.03 Mean :0.02 Mean :0.02 Mean :0.02 Mean :0.07964 Mean :0.08322
monthly_2g_8 monthly_2g_9 sachet_2g_6 sachet_2g_7 sachet_2g_8 sachet_2g_9
Min. :0.000 Min. :0.00000 Min. : 0.0000 Min. : 0.0000 Min. : 0.0000 Min. : 0.0000
1st Qu.:0.000 1st Qu.:0.00000 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.0000
Median :0.000 Median :0.00000 Median : 0.0000 Median : 0.0000 Median : 0.0000 Median : 0.0000
Mean :0.081 Mean :0.06878 Mean : 0.3894 Mean : 0.4396 Mean : 0.4501 Mean : 0.3931
monthly_3g_6 monthly_3g_7 monthly_3g_8 monthly_3g_9 sachet_3g_6
Min. : 0.00000 Min. : 0.00000 Min. : 0.00000 Min. : 0.00000 Min. : 0.00000
1st Qu.: 0.00000 1st Qu.: 0.00000 1st Qu.: 0.00000 1st Qu.: 0.00000 1st Qu.: 0.00000
Median : 0.00000 Median : 0.00000 Median : 0.00000 Median : 0.00000 Median : 0.00000
Mean : 0.07592 Mean : 0.07858 Mean : 0.08294 Mean : 0.08634 Mean : 0.07478
sachet_3g_7 sachet_3g_8 sachet_3g_9 fb_user_6 fb_user_7 fb_user_8
Min. : 0.0000 Min. : 0.0000 Min. : 0.00000 Min. :0.00 Min. :0.00 Min. :0.00
1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.00000 1st Qu.:1.00 1st Qu.:1.00 1st Qu.:1.00
Median : 0.0000 Median : 0.0000 Median : 0.00000 Median :1.00 Median :1.00 Median :1.00
Mean : 0.0804 Mean : 0.0845 Mean : 0.08458 Mean :0.91 Mean :0.91 Mean :0.89
fb_user_9 aon aug_vbc_3g jul_vbc_3g jun_vbc_3g sep_vbc_3g
Min. :0.00 Min. : 180 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.000
1st Qu.:1.00 1st Qu.: 467 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000
Median :1.00 Median : 863 Median : 0.00 Median : 0.00 Median : 0.00 Median : 0.000
Mean :0.86 Mean :1220 Mean : 68.17 Mean : 66.84 Mean : 60.02 Mean : 3.299
[ reached getOption("max.print") -- omitted 3 rows ]
Let us look at duplicates in the mobile number column
Let us now create the target variable which is the indicator for churn.
# Consolidating the internet data for Sept month
numChurn$SepInternet <- numChurn$vol_2g_mb_9 + numChurn$vol_3g_mb_9
The customers who have no call and internet data in the Sept month would be the ones who have churned
Proposition 1 : Let us take the revenue per month for three months and take its standard deviation and then see if there are any trends coming up
# take a sample of only the required variables
churnSamp1 <- numChurn %>% select(mobile_number,arpu_6,arpu_7,arpu_8,target)
# let us create a new sample to take only the ARPu figures to take row wise standard deviation
churnSamp2 <- churnSamp1 %>% select(arpu_6,arpu_7,arpu_8)
# Scaling the data
library(caret)
pp = preProcess(churnSamp2,method = 'scale')
churnSamp2_tran = predict(pp,churnSamp2)
churnSamp2_tran <- transform(churnSamp2_tran,SD = apply(churnSamp2_tran,1,sd,na.rm = TRUE))
churnSamp2_tran
NA
Let us add the mobile numbers to the data set and do some visuali
churnSamp2_tran$mobile <- churnData$mobile_number
churnSamp2_tran$target <- numChurn$target
head(churnSamp2_tran)
Let us plot the standard deviation of the revenue and find some bins
library(ggplot2)
churnSamp2_tran %>% ggplot(aes(mobile,SD)) + geom_point()
Hypothesis 1. Too much flux in the revenue can indicate propensity to churn
Let us look at cases where the standard deviation is above 5
churnSamp2_tran %>% filter(SD > 5) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(Mn = mean(Per,na.rm = TRUE))
NA
NA
NA
The number of customers who are likely to churn in the high sd range is almost 3 times the normal zone
Let us now look at the distribution of the sD
churnSamp2_tran %>% ggplot(aes(x= SD)) + geom_density()
# Mapping the quantile values also
quantile(churnSamp2_tran$SD, c(.05, .15,.25,.5,.6,.75,.85, .98))
5% 15% 25% 50% 60% 75% 85% 98%
0.04131759 0.08230600 0.12069221 0.23162946 0.29212411 0.42642087 0.58915619 1.43418310
Let us plot some tables based on these bins and see the % of customers within these bins
churnSamp2_tran %>% filter(SD < 0.05709328 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_tran %>% filter(SD >= 0.05709328 & SD < 0.10041949 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_tran %>% filter(SD >= 0.10041949 & SD < 0.13878567 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_tran %>% filter(SD >= 0.13878567 & SD < 0.25155975 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_tran %>% filter(SD >= 0.25155975 & SD < 0.31318986 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_tran %>% filter(SD >= 0.31318986 & SD < 0.44807817 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_tran %>% filter(SD >= 0.44807817 & SD < 0.61230192 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_tran %>% filter(SD >= 0.61230192 & SD < 1.43997168 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_tran %>% filter(SD >= 1.43997168) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(Mn = mean(Per,na.rm = TRUE),NUm = n())
NA
Creating the standard deviation of the arpu accross months 6 to 8 seems to be a good indicator
head(numChurn)
Let us look at the on network mou. My hypothesis is there shouldnt be any change in the on network mou as to whether a customer is churning or not
# take a sample of only the required variables
churnSamp1_hyp2 <- numChurn %>% select(mobile_number,offnet_mou_6,offnet_mou_7,offnet_mou_8,target)
# let us create a new sample to take only the ARPu figures to take row wise standard deviation
churnSamp2_hyp2 <- churnSamp1_hyp2 %>% select(offnet_mou_6,offnet_mou_7,offnet_mou_8)
churnSamp2_hyp2[is.na(churnSamp2_hyp2)] <- 0
# Scaling the data
library(caret)
pp = preProcess(churnSamp2_hyp2,method = 'scale')
churnSamp2_hyp2 = predict(pp,churnSamp2_hyp2)
churnSamp2_hyp2
NA
churnSamp2_hyp2 <- transform(churnSamp2_hyp2,SD = apply(churnSamp2_hyp2,1,sd,na.rm = TRUE))
churnSamp2_hyp2$mobile <- churnData$mobile_number
churnSamp2_hyp2$target <- numChurn$target
head(churnSamp2_hyp2)
# Mapping the quantile values also
quantile(churnSamp2_hyp2$SD, c(.05, .15,.25,.5,.6,.75,.85, .95,.99))
5% 15% 25% 50% 60% 75% 85% 95%
0.0007897178 0.0099430038 0.0207226499 0.0641469656 0.0966209515 0.1959115512 0.3696960453 1.0014883183
99%
2.3819037846
churnSamp2_hyp2 %>% filter(SD < 0.0007897178 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_hyp2 %>% filter(SD >= 0.0007897178 & SD < 0.0099430038 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_hyp2 %>% filter(SD >= 0.0099430038 & SD < 0.0207226499 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_hyp2 %>% filter(SD >= 0.0207226499 & SD < 0.0641469656 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_hyp2 %>% filter(SD >= 0.0641469656 & SD < 0.0966209515 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_hyp2 %>% filter(SD >= 0.0966209515 & SD < 0.1959115512 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_hyp2 %>% filter(SD >= 0.1959115512 & SD < 0.3696960453 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_hyp2 %>% filter(SD >= 0.3696960453 & SD < 1.0014883183 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_hyp2 %>% filter(SD >= 1.0014883183 & SD < 2.3819037846 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
churnSamp2_hyp2 %>% filter(SD >= 2.3819037846 ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(Mn = mean(Per,na.rm = TRUE),NUm = n())
The behaviour with respect to network usage is found to be opposite of the behaviour which was found for average revenue.
Both the onnet and offnet usage behaviour has a parabolic relationship with churn.
churnData[1:10,]
Let u slook at roaming incoming and roaming outgoing relationships
Creating a function to do the job
scalerFun <- function(var1,var2,var3){
# take a sample of only the required variables
churnSamp1_hyp2 <- numChurn %>% select(mobile_number,var1,var2,var3)
# let us create a new sample to take only the ARPu figures to take row wise standard deviation
churnSamp2_hyp2 <- churnSamp1_hyp2 %>% select(var1,var2,var3)
churnSamp2_hyp2[is.na(churnSamp2_hyp2)] <- 0
# Scaling the data
pp = preProcess(churnSamp2_hyp2,method = 'scale')
churnSamp2_hyp2 = predict(pp,churnSamp2_hyp2)
churnSamp2_hyp2 <- transform(churnSamp2_hyp2,SD = apply(churnSamp2_hyp2,1,sd,na.rm = TRUE))
churnSamp2_hyp2$mobile <- churnData$mobile_number
churnSamp2_hyp2$target <- numChurn$target
churnSamp2_hyp2
# Quantile mapping
quantvals <- quantile(churnSamp2_hyp2$SD, c(.05, .15,.25,.5,.6,.75,.85, .95,.99),na.rm = TRUE)
val1 <- churnSamp2_hyp2 %>% filter(SD < quantvals[1] ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
val2 <-churnSamp2_hyp2 %>% filter(SD >= quantvals[1] & SD < quantvals[2] ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
val3 <- churnSamp2_hyp2 %>% filter(SD >= quantvals[2] & SD < quantvals[3] ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
val4 <-churnSamp2_hyp2 %>% filter(SD >= quantvals[3] & SD < quantvals[4] ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
val5 <-churnSamp2_hyp2 %>% filter(SD >= quantvals[4] & SD < quantvals[5] ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
val6 <-churnSamp2_hyp2 %>% filter(SD >= quantvals[5] & SD < quantvals[6] ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
val7 <-churnSamp2_hyp2 %>% filter(SD >= quantvals[6] & SD < quantvals[7] ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
val8 <-churnSamp2_hyp2 %>% filter(SD >= quantvals[7] & SD < quantvals[8] ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
val9 <-churnSamp2_hyp2 %>% filter(SD >= quantvals[8] & SD < quantvals[9] ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
val10 <-churnSamp2_hyp2 %>% filter(SD >= quantvals[9] ) %>% mutate(Per = sum(target)/n()) %>% select(Per) %>% summarise(NUm = n(),Mn = mean(Per,na.rm = TRUE))
l1 = list(df1=churnSamp2_hyp2,val1=val1,val2=val2,val3=val3,val4=val4,val5=val5,val6=val6,val7=val7,val8=val8,val9=val9,val10=val10)
l1
}
Running the function for roaming values
roamSamp <- scalerFun('roam_ic_mou_6','roam_ic_mou_7','roam_ic_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
NA
NA
NA
Roaming also is found to have the same behaviour as the other variables. The highest quantile values shows the maximum propensity to churn.
# make an empty data frame
churnFeat1 <- data.frame(matrix( nrow = 99999, ncol = 0))
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD')
head(churnFeat1)
NA
roamSamp <- scalerFun('roam_og_mou_6','roam_og_mou_7','roam_og_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD')
head(churnFeat1)
NA
roamSamp <- scalerFun('loc_og_t2t_mou_6','loc_og_t2t_mou_7','loc_og_t2t_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD')
head(churnFeat1)
NA
Local calls within the same network within the network of the same telecom operator shows a reverse trend where low flex zone has higher probability of finding churn cases.
roamSamp <- scalerFun('loc_og_t2m_mou_6','loc_og_t2m_mou_7','loc_og_t2m_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD')
head(churnFeat1)
The proportion of the churn cases are more in the lower ranges of standard deviation
roamSamp <- scalerFun('loc_og_t2f_mou_6','loc_og_t2f_mou_7','loc_og_t2f_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD')
head(churnFeat1)
NA
Not much information in this feature.
Do calls to ones call centre indicate a propensity for churn ?
roamSamp <- scalerFun('loc_og_t2c_mou_6','loc_og_t2c_mou_7','loc_og_t2c_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD')
head(churnFeat1)
NA
Not much information in this data point.
roamSamp <- scalerFun('loc_og_mou_6','loc_og_mou_7','loc_og_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD')
head(churnFeat1)
NA
Lower range of this has around 40% of churn cases.
roamSamp <- scalerFun('std_og_t2t_mou_6','std_og_t2t_mou_7','std_og_t2t_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD')
head(churnFeat1)
NA
The higher end of the spectrum shows the highest propensity to churn.Top three bands of the standard deviation has the highest propensity.
roamSamp <- scalerFun('std_og_t2m_mou_6','std_og_t2m_mou_7','std_og_t2m_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD')
head(churnFeat1)
NA
The top end of the spectrum has the heighest propensity of churn.
roamSamp <- scalerFun('std_og_t2f_mou_6','std_og_t2f_mou_7','std_og_t2f_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD')
head(churnFeat1)
NA
As expected the flux in fixed lines dosent present any meaningful indicator at all.
roamSamp <- scalerFun('std_og_mou_6','std_og_mou_7','std_og_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD')
head(churnFeat1)
NA
There is a mild parabolic representation with the top standard deviation values having the higher probability of churn propensity.
roamSamp <- scalerFun('isd_og_mou_6','isd_og_mou_7','isd_og_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD')
head(churnFeat1)
NA
The highest band has the greatest probability of finding churn.
roamSamp <- scalerFun('spl_og_mou_6','spl_og_mou_7','spl_og_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD')
head(churnFeat1)
NA
Not much difference in this segment.
roamSamp <- scalerFun('og_others_6','og_others_7','og_others_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD')
head(churnFeat1)
The top one has the most propensity. However not much to talk about.
roamSamp <- scalerFun('total_og_mou_6','total_og_mou_7','total_og_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD')
head(churnFeat1)
NA
There is a parabolic relationsip. With some of the highest representation at the bottom end of the std range. The top also showing some propensity for the same. The bottom end must be heavily influenced by some of the 40% range features.
roamSamp <- scalerFun('loc_ic_t2t_mou_6','loc_ic_t2t_mou_7','loc_ic_t2t_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD')
head(churnFeat1)
NA
The bottom 15000 cases has the highest propensity for churn 24%
The bottom has a very high percentage of churn about 43%.
##Local incoming telecom operator to a fixed line
roamSamp <- scalerFun('loc_ic_t2f_mou_6','loc_ic_t2f_mou_7','loc_ic_t2f_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD')
head(churnFeat1)
NA
The bottom layer has the highest propensity for churn.
roamSamp <- scalerFun('loc_ic_mou_6','loc_ic_mou_7','loc_ic_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD')
head(churnFeat1)
The bottom 5000 which has the least standard deviation has the higher proportion of churn with 42%.
roamSamp <- scalerFun('std_ic_t2t_mou_6','std_ic_t2t_mou_7','std_ic_t2t_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD')
head(churnFeat1)
NA
STD incoming dosent have much of a differentiation and the churn percentage is evenly distributed.
roamSamp <- scalerFun('std_ic_t2m_mou_6','std_ic_t2m_mou_7','std_ic_t2m_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
roamSamp <- scalerFun('std_ic_t2t_mou_6','std_ic_t2t_mou_7','std_ic_t2t_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD','std_ic_t2m_SD')
head(churnFeat1)
NA
This is a parabolic distribution with the maximum propensity with the bottom band and high band. However the difference between bands is not that pronounced
roamSamp <- scalerFun('std_ic_t2f_mou_6','std_ic_t2f_mou_7','std_ic_t2f_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD','std_ic_t2m_SD','std_ic_t2f_SD')
head(churnFeat1)
Not much of differentiation
roamSamp <- scalerFun('std_ic_mou_6','std_ic_mou_7','std_ic_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD','std_ic_t2m_SD','std_ic_t2f_SD','std_ic_SD')
head(churnFeat1)
A parabolic relationship with the bottom end having the maximum propensity of churn
roamSamp <- scalerFun('total_ic_mou_6','total_ic_mou_7','total_ic_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD','std_ic_t2m_SD','std_ic_t2f_SD','std_ic_SD','total_ic_SD')
head(churnFeat1)
NA
For total incoming the bottom 5000 has the heighest % of churn cases with almost 40%. This could be on the basis of the feature with 43% churn percentage
roamSamp <- scalerFun('spl_ic_mou_6','spl_ic_mou_7','spl_ic_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD','std_ic_t2m_SD','std_ic_t2f_SD','std_ic_SD','total_ic_SD','spl_ic_SD')
head(churnFeat1)
NA
NA
Nothing interesting going on here with the special calls.
roamSamp <- scalerFun('isd_ic_mou_6','isd_ic_mou_7','isd_ic_mou_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD','std_ic_t2m_SD','std_ic_t2f_SD','std_ic_SD','total_ic_SD','spl_ic_SD','isd_ic_SD')
head(churnFeat1)
NA
Nothing interesting going on here too
roamSamp <- scalerFun('ic_others_6','ic_others_7','ic_others_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD','std_ic_t2m_SD','std_ic_t2f_SD','std_ic_SD','total_ic_SD','spl_ic_SD','isd_ic_SD','ic_others_SD')
head(churnFeat1)
NA
This is also not very promising feature
roamSamp <- scalerFun('total_rech_num_6','total_rech_num_7','total_rech_num_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD','std_ic_t2m_SD','std_ic_t2f_SD','std_ic_SD','total_ic_SD','spl_ic_SD','isd_ic_SD','ic_others_SD','total_rech_num_SD')
head(churnFeat1)
NA
The higher SD values have larger proportion of churn cases 26% compared to the other cases.
roamSamp <- scalerFun('total_rech_amt_6','total_rech_amt_7','total_rech_amt_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD','std_ic_t2m_SD','std_ic_t2f_SD','std_ic_SD','total_ic_SD','spl_ic_SD','isd_ic_SD','ic_others_SD','total_rech_num_SD','total_rech_amt_SD')
head(churnFeat1)
NA
The recharge amount has similar trend as recharge numbers
roamSamp <- scalerFun('max_rech_amt_6','max_rech_amt_7','max_rech_amt_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD','std_ic_t2m_SD','std_ic_t2f_SD','std_ic_SD','total_ic_SD','spl_ic_SD','isd_ic_SD','ic_others_SD','total_rech_num_SD','total_rech_amt_SD','max_rech_amt_SD')
head(churnFeat1)
NA
Not very pronounced trend here eventhough higher sd values band have higher percentage of churn cases.
roamSamp <- scalerFun('last_day_rch_amt_6','last_day_rch_amt_7','last_day_rch_amt_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD','std_ic_t2m_SD','std_ic_t2f_SD','std_ic_SD','total_ic_SD','spl_ic_SD','isd_ic_SD','ic_others_SD','total_rech_num_SD','total_rech_amt_SD','max_rech_amt_SD','last_day_rch_amt_SD')
head(churnFeat1)
NA
Not a very informative feature
roamSamp <- scalerFun('total_rech_data_6','total_rech_data_7','total_rech_data_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD','std_ic_t2m_SD','std_ic_t2f_SD','std_ic_SD','total_ic_SD','spl_ic_SD','isd_ic_SD','ic_others_SD','total_rech_num_SD','total_rech_amt_SD','max_rech_amt_SD','last_day_rch_amt_SD','total_rech_data_SD')
head(churnFeat1)
NA
NA
Not much informative features
roamSamp <- scalerFun('max_rech_data_6','max_rech_data_7','max_rech_data_8')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD','std_ic_t2m_SD','std_ic_t2f_SD','std_ic_SD','total_ic_SD','spl_ic_SD','isd_ic_SD','ic_others_SD','total_rech_num_SD','total_rech_amt_SD','max_rech_amt_SD','last_day_rch_amt_SD','total_rech_data_SD','max_rech_data_SD')
head(churnFeat1)
NA
NA
This strategy of standard deviation not very informative for these features
roamSamp <- scalerFun('jun_vbc_3g','jul_vbc_3g','aug_vbc_3g')
roamSamp$df1
roamSamp$val1
roamSamp$val2
roamSamp$val3
roamSamp$val4
roamSamp$val5
roamSamp$val6
roamSamp$val7
roamSamp$val8
roamSamp$val9
roamSamp$val10
churnFeat1 <- cbind(churnFeat1,roamSamp$df1$SD)
colnames(churnFeat1) <- c('roam_ic_SD','roam_og_SD','loc_og_t2t_SD','loc_og_t2m_SD','loc_og_t2f_SD','loc_og_t2c_SD','loc_og_SD','std_og_t2t_SD','std_og_t2m_SD','std_og_t2f_SD','std_og_SD','isd_og_SD','spl_og_SD','og_others_SD','total_og_SD','loc_ic_t2t_SD','loc_ic_t2m_SD','loc_ic_t2f_SD','loc_ic_SD','std_ic_t2t_SD','std_ic_t2m_SD','std_ic_t2f_SD','std_ic_SD','total_ic_SD','spl_ic_SD','isd_ic_SD','ic_others_SD','total_rech_num_SD','total_rech_amt_SD','max_rech_amt_SD','last_day_rch_amt_SD','total_rech_data_SD','max_rech_data_SD','aug_vbc_SD')
head(churnFeat1)
NA
count_rech_2g_6 : Not informative count_rech_3g_6 : Not informative av_rech_amt_data_6 vol_2g_mb_6 / 3g arpu_3g_8 / 2g night_pck_user_8
churnData[1:10,]
set.seed(123)
fit.glmnet <- train(churn~.,data = churnFeat1,method='glmnet',metric=metric,trControl=trainC)
1 package is needed for this model and is not installed. (glmnet). Would you like to try to install it now?
1: yes
2: no
unique(churnFeat1$churn)
[1] 1 0
Levels: 0 1